suppressPackageStartupMessages(library(scMET))
suppressPackageStartupMessages(library(BASiCS))
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(ggpubr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(viridis))
suppressPackageStartupMessages(library(ggplot2))
#####################
## Define settings ##
#####################
source("../load_settings.R")
data_dir <- "/Users/ckapoura/datasets/scMET_ms/gastrulation/data/"
hits_dir <- "/Users/ckapoura/datasets/scMET_ms/gastrulation/metrna/hits/"
pdf_dir <- "/Users/ckapoura/datasets/scMET_ms/gastrulation/metrna/analysis/"
if (!dir.exists(hits_dir)) {dir.create(hits_dir, recursive = TRUE)}
if (!dir.exists(pdf_dir)) {dir.create(pdf_dir, recursive = TRUE)}
# Define genomic context
opts$anno <- c("prom_2000_2000")
#opts$anno <- c("first_intron")# Load BASiCS object
basics_obj <- readRDS(file = paste0(data_dir, "/rna_basics.rds"))
# SingleCellExperiment object
sce <- readRDS(io$rna)[, sample_metadata$id_rna]
# Perform HVG analysis
HVG <- BASiCS_DetectVG(basics_obj, PercentileThreshold = 0.9,
EFDR = 0.01, Plot = FALSE)Posterior probability threshold has been supplied
EFDR will not be calibrated.
hvg_results <- HVG@Table[, c("GeneName", "Prob", "HVG")]
colnames(hvg_results) <- c("ens_id", "rna_tail_prob", "rna_is_variable")
# Obtain posterior draws summary
chain_summary <- Summary(basics_obj)
# Merge Ensemb IDs with gene symbol
subset_meta <- rowData(sce[rownames(chain_summary@parameters$mu)]) %>%
as.data.table(keep.rownames = "ens_id") %>%
merge(., hvg_results, by = "ens_id")Arguments in '...' ignored
# Get posterior medians summary
post_summary <- data.table(ens_id = rownames(chain_summary@parameters$mu),
rna_mu = log(chain_summary@parameters$mu[, 1]),
rna_epsilon = chain_summary@parameters$epsilon[, 1],
rna_delta = log(chain_summary@parameters$delta[, 1]) )# Show mean-overdispersion relationship
plot(BASiCS_ShowFit(basics_obj))# Combine data
joint_rna_dt <- merge(subset_meta, post_summary, by = "ens_id") %>%
setorder(-rna_epsilon)
to.plot <- joint_rna_dt[rna_mu > 0.1]
tmp <- to.plot[to.plot$rna_is_variable == FALSE, ]
tmp <- tmp[sample(NROW(tmp), 2000), ]
to.plot <- rbind(to.plot[to.plot$rna_is_variable == TRUE, ], tmp)
mode <- c("rna_delta", "rna_epsilon")
y_lab <- list(
"rna_delta" = "RNA log overdispersion",
"rna_epsilon" = "RNA residual overdispersion"
)
for (m in mode) {
gg <- ggplot(to.plot, aes_string(x = "rna_mu", y = m)) +
geom_point(aes(fill = rna_is_variable, alpha = rna_is_variable, size = rna_is_variable),
shape = 21, stroke = 0.01) +
scale_fill_manual(values = c("gray80","red")) +
scale_alpha_manual(values = c(0.4, 0.6)) +
scale_size_manual(values = c(0.65, 1.6)) +
guides(fill = guide_legend(override.aes = list(size = 2.5))) +
xlab("Log mean expression") + ylab(y_lab[[m]]) +
ggrepel::geom_text_repel(data = head(to.plot[rna_is_variable == TRUE], n = 20),
aes_string(x = "rna_mu", y = m, label = "symbol"),
size = 3, color = "black") +
theme_classic() +
theme(
legend.position = "none",
strip.text = element_text(color = "black", size = rel(1.1)),
legend.text = element_text(color = "black", size = rel(1.15)),
axis.text = element_text(color = "black", size = rel(1)),
axis.title = element_text(color = "black", size = rel(1.2))
)
print(gg)
pdf(file = paste0(pdf_dir, "basics_meanvar_", m, ".pdf"),
width = 7, height = 5, useDingbats = FALSE)
print(gg)
dev.off()
}rna_hvg <- joint_rna_dt[, c("ens_id", "symbol", "rna_tail_prob", "rna_is_variable",
"rna_mu", "rna_epsilon", "rna_delta")] %>%
setnames(c("symbol"), c("rna_symbol"))
fwrite(rna_hvg, paste0(hits_dir,"/rna_hvg.txt.gz"), sep = "\t", quote = FALSE)# Perform HVF analysis
obj <- readRDS(sprintf("%s/%s_vb.rds", data_dir, opts$anno))
scmet_obj <- scmet_hvf(scmet_obj = obj, delta_e = 0.9, delta_g = NULL, efdr = 0.1)For HVF detection task:
Evidence probability threshold chosen via EFDR valibration is too low.
Probability threshold set automatically equal to 'evidence_thresh'.
641 Features classified as highly variable using:
- The 90 percentile of variable genes
- Evidence threshold = 0.8
- EFDR = 6.74%
- EFNR = 9.78%
# Obtain gene names
gene_metadata <- fread(io$gene.metadata) %>% .[,c("ens_id", "symbol")]
#Highlight top hits for promoters
met_joint <- scmet_obj$hvf$summary %>% as.data.table %>%
setnames("feature_name", "ens_id") %>%
merge(gene_metadata, by = "ens_id") %>%
setorder(-epsilon)mode <- c("epsilon", "gamma")
for (m in mode) {
gg <- scmet_plot_mean_var(obj = scmet_obj, y = m, task = "hvf",
nfeatures = 3000) +
theme(legend.position = c(0.9, 0.1)) +
ggrepel::geom_text_repel(data = head(met_joint, n = 35),
aes_string(x = "mu", y = m, label = "symbol"),
size = 2.3, color = "black")
print(gg)
pdf(paste0(pdf_dir, "/scmet_meanvar_", opts$anno, "_", m, ".pdf"),
width = 7, height = 5, useDingbats = FALSE)
print(gg)
dev.off()
}met_hvf <- scmet_obj$hvf$summary %>%
as.data.table %>% .[, anno := opts$anno] %>%
setnames("feature_name", "ens_id")
fwrite(met_hvf, paste0(hits_dir, "/met_hvf_", opts$anno, ".txt.gz"), sep = "\t", quote = FALSE)## Combine omics
metrna <- merge(met_hvf, rna_hvg, by = "ens_id") %>%
merge(gene_metadata, by = "ens_id")
# Process
metrna[, color := "black"]
metrna[epsilon > 0.5 & rna_epsilon > 1.5, color := "red"]
metrna[epsilon < 0.15 & rna_epsilon > 2, color := "blue"]
metrna <- metrna %>% setorder(-rna_epsilon)
fwrite(metrna, file = paste0(hits_dir, "metrna_variability_", opts$anno, ".csv"),
sep = "\t", quote = FALSE)set.seed(12)
# Marker genes
marker_genes <- c(
"Cubn", "Cer1", "Mixl1", "Lefty2", "Amn", "Cldn6", "Mesp1",
"Krt8", "Apob", "Foxi1", "Aplnr", "Id3", "Peg3", "Dppa5a", "Dppa4",
"Dppa2", "Spp1", "Morc1", "Trap1a", "Pcdh19", "Zfp42", "Fmr1nb"
)
if (opts$anno == "prom_2000_2000") {
main_txt <- "Promoter region"
} else if (opts$anno == "first_exon") {
main_txt <- "First exon"
} else {
main_txt <- "First intron"
}
#main_txt <- ifelse(opts$anno == "prom_2000_2000", "Promoter region", "First exon")
# Create copy
metrna_plot <- copy(metrna)
tmp <- metrna_plot[metrna_plot$color == "black", ]
tmp <- tmp[sample(NROW(tmp), ifelse(opts$anno == "prom_2000_2000", 4000, 1000)), ]
to.plot <- rbind(metrna_plot[metrna_plot$color != "black", ], tmp)
gg <- ggplot(to.plot, aes(x = epsilon, y = rna_epsilon,
alpha = color, fill = color, size = color)) +
labs(x = expression(paste("Residual overdispersion (DNAm)")),
y = "Residual overdispersion (RNA)") +
geom_point(shape = 21, stroke = 0.1, color = "black") +
scale_fill_manual(values = c("black" = "gray80", "red" = "#4E934D", "blue" = "#C400AD")) +
scale_size_manual(values = c("black" = 0.6, "red" = 2, "blue" = 2)) +
scale_alpha_manual(values = c("black" = 0.5, "red" = 0.8, "blue" = 0.8)) +
ggrepel::geom_text_repel(data = to.plot[color == "red" & symbol %in% marker_genes],
aes(x = epsilon, y = rna_epsilon, label = symbol),
size = 5, color = "#4E934D") +
ggrepel::geom_text_repel(data = to.plot[color == "blue" & symbol %in% marker_genes],
aes(x = epsilon, y = rna_epsilon, label = symbol),
size = 5, color = "#C400AD") +
coord_cartesian(xlim = c(-0.9, 1.6)) +
theme_classic() +
ggtitle(main_txt) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", color = "black", size = rel(1.45)),
strip.text = element_text(color = "black", size = rel(1.1)),
legend.text = element_text(color = "black", size = rel(1.15)),
axis.text = element_text(color = "black", size = rel(1.05)),
axis.title = element_text(color = "black", size = rel(1.3))
)
print(gg)
pdf(paste0(pdf_dir, "/metrna_variability_", opts$anno, ".pdf"), width = 10, height = 4, useDingbats = FALSE)print(gg)
dev.off()quartz_off_screen
2
set.seed(12)
# Marker genes
marker_genes <- c(
"Cubn", "Cer1", "Mixl1", "Lefty2", "Amn", "Cldn6", "Mesp1",
"Krt8", "Apob", "Foxi1", "Aplnr", "Id3", "Peg3", "Dppa5a", "Dppa4",
"Dppa2", "Spp1", "Morc1", "Trap1a", "Pcdh19", "Zfp42", "Fmr1nb"
)
#main_txt <- ifelse(opts$anno == "prom_2000_2000", "Promoter region", "First exon")
if (opts$anno == "prom_2000_2000") {
main_txt <- "Promoter region"
} else if (opts$anno == "first_exon") {
main_txt <- "First exon"
} else {
main_txt <- "First intron"
}
# Create copy
metrna_plot <- copy(metrna)
tmp <- metrna_plot[metrna_plot$color == "black", ]
tmp <- tmp[sample(NROW(tmp), ifelse(opts$anno == "prom_2000_2000", 4000, 1000)), ]
to.plot <- rbind(metrna_plot[metrna_plot$color != "black", ], tmp)
gg <- ggplot(to.plot, aes(x = mu, y = rna_mu,
alpha = color, fill = color, size = color)) +
labs(x = expression(paste("Mean DNA methylation")),
y = "Mean RNA expression") +
geom_point(shape = 21, stroke = 0.1, color = "black") +
scale_fill_manual(values = c("black" = "gray80", "red" = "#4E934D", "blue" = "#C400AD")) +
scale_size_manual(values = c("black" = 0.6, "red" = 2, "blue" = 2)) +
scale_alpha_manual(values = c("black" = 0.5, "red" = 0.8, "blue" = 0.8)) +
ggrepel::geom_text_repel(data = to.plot[color == "red" & symbol %in% marker_genes],
aes(x = mu, y = rna_mu, label = symbol),
size = 5, color = "#4E934D") +
ggrepel::geom_text_repel(data = to.plot[color == "blue" & symbol %in% marker_genes],
aes(x = mu, y = rna_mu, label = symbol),
size = 5, color = "#C400AD") +
annotate("text", y = 9, x = 0.88,label="R = -0.4",hjust = 1) +
#geom_smooth(method=lm, se=FALSE) +
#coord_cartesian(xlim = c(, 1.6)) +
theme_classic() +
ggtitle(main_txt) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", color = "black", size = rel(1.45)),
strip.text = element_text(color = "black", size = rel(1.1)),
legend.text = element_text(color = "black", size = rel(1.15)),
axis.text = element_text(color = "black", size = rel(1.05)),
axis.title = element_text(color = "black", size = rel(1.3))
)
print(gg)
pdf(paste0(pdf_dir, "/metrna_mean_", opts$anno, ".pdf"), width = 8, height = 5, useDingbats = FALSE)print(gg)
dev.off()quartz_off_screen
2
set.seed(12)
# Get density of points in 2 dimensions.
# @param x A numeric vector.
# @param y A numeric vector.
# @param n Create a square n by n grid to compute density.
# @return The density within each square.
.get_density <- function(x, y, ...) {
dens <- MASS::kde2d(x, y, ...)
ix <- findInterval(x, dens$x)
iy <- findInterval(y, dens$y)
ii <- cbind(ix, iy)
return(dens$z[ii])
}
# ggplot(parameter_df[ind_not_na, ], aes(log10(cv2_scran), log10(delta))) + geom_pointdensity() + scale_colour_viridis(name = "Density")
# library("ggpointdensity")
# library("viridis")
# Marker genes
marker_genes <- c(
"Cubn", "Cer1", "Mixl1", "Lefty2", "Amn", "Cldn6", "Mesp1",
"Krt8", "Apob", "Foxi1", "Aplnr", "Id3", "Peg3", "Dppa5a", "Dppa4",
"Dppa2", "Spp1", "Morc1", "Trap1a", "Pcdh19", "Zfp42", "Fmr1nb"
)
main_txt <- ifelse(opts$anno == "prom_2000_2000", "Promoter region", "First exon")
# Create copy
metrna_plot <- copy(metrna)
tmp <- metrna_plot[metrna_plot$color == "black", ]
#tmp <- tmp[sample(NROW(tmp), ifelse(opts$anno == "prom_2000_2000", 4000, 1000)), ]
to.plot <- metrna_plot# rbind(metrna_plot[metrna_plot$color != "black", ], tmp)
to.plot <- to.plot %>% .[, density := .get_density(log(mu), rna_mu, n = 50)]
gg <- ggplot(to.plot, aes(x = log(mu), y = rna_mu, color = density)) +
labs(x = expression(paste("Log mean DNA methylation")),
y = "Log mean RNA expression") +
geom_point(size = 1) +
viridis::scale_fill_viridis() +
viridis::scale_color_viridis() +
# ggrepel::geom_text_repel(data = to.plot[color == "red" & symbol %in% marker_genes],
# aes(x = log(mu), y = rna_mu, label = symbol),
# size = 5, color = "#4E934D") +
# ggrepel::geom_text_repel(data = to.plot[color == "blue" & symbol %in% marker_genes],
# aes(x = log(mu), y = rna_mu, label = symbol),
# size = 5, color = "#C400AD") +
#annotate("text", y = 9, x = 0.88,label="R = -0.4",hjust = 1) +
annotate("text", y = 9, x = 0,label="R = -0.4",hjust = 1) +
#geom_smooth(method=lm, se=FALSE) +
#coord_cartesian(xlim = c(, 1.6)) +
theme_classic() +
ggtitle(main_txt) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", color = "black", size = rel(1.45)),
strip.text = element_text(color = "black", size = rel(1.1)),
legend.text = element_text(color = "black", size = rel(1.15)),
axis.text = element_text(color = "black", size = rel(1.05)),
axis.title = element_text(color = "black", size = rel(1.3))
)
print(gg)
pdf(paste0(pdf_dir, "/metrna_mean_heatmap_", opts$anno, ".pdf"), width = 8, height = 5, useDingbats = FALSE)print(gg)
dev.off()quartz_off_screen
2